{"embeddings.word_embeddings.weight": "word_emb.weight", "embeddings.position_embeddings.weight": "pos_emb.weight", "embeddings.token_type_embeddings.weight": "sent_emb.weight", "embeddings.layer_norm.weight": "ln.weight", "embeddings.layer_norm.bias": "ln.bias", "encoder.layers.0.self_attn.q_proj.weight": "encoder_stack.block.0.attn.q.weight", "encoder.layers.0.self_attn.q_proj.bias": "encoder_stack.block.0.attn.q.bias", "encoder.layers.0.self_attn.k_proj.weight": "encoder_stack.block.0.attn.k.weight", "encoder.layers.0.self_attn.k_proj.bias": "encoder_stack.block.0.attn.k.bias", "encoder.layers.0.self_attn.v_proj.weight": "encoder_stack.block.0.attn.v.weight", "encoder.layers.0.self_attn.v_proj.bias": "encoder_stack.block.0.attn.v.bias", "encoder.layers.0.self_attn.out_proj.weight": "encoder_stack.block.0.attn.o.weight", "encoder.layers.0.self_attn.out_proj.bias": "encoder_stack.block.0.attn.o.bias", "encoder.layers.1.self_attn.q_proj.weight": "encoder_stack.block.1.attn.q.weight", "encoder.layers.1.self_attn.q_proj.bias": "encoder_stack.block.1.attn.q.bias", "encoder.layers.1.self_attn.k_proj.weight": "encoder_stack.block.1.attn.k.weight", "encoder.layers.1.self_attn.k_proj.bias": "encoder_stack.block.1.attn.k.bias", "encoder.layers.1.self_attn.v_proj.weight": "encoder_stack.block.1.attn.v.weight", "encoder.layers.1.self_attn.v_proj.bias": "encoder_stack.block.1.attn.v.bias", "encoder.layers.1.self_attn.out_proj.weight": "encoder_stack.block.1.attn.o.weight", "encoder.layers.1.self_attn.out_proj.bias": "encoder_stack.block.1.attn.o.bias", "encoder.layers.2.self_attn.q_proj.weight": "encoder_stack.block.2.attn.q.weight", "encoder.layers.2.self_attn.q_proj.bias": "encoder_stack.block.2.attn.q.bias", "encoder.layers.2.self_attn.k_proj.weight": "encoder_stack.block.2.attn.k.weight", "encoder.layers.2.self_attn.k_proj.bias": "encoder_stack.block.2.attn.k.bias", "encoder.layers.2.self_attn.v_proj.weight": "encoder_stack.block.2.attn.v.weight", "encoder.layers.2.self_attn.v_proj.bias": "encoder_stack.block.2.attn.v.bias", "encoder.layers.2.self_attn.out_proj.weight": "encoder_stack.block.2.attn.o.weight", "encoder.layers.2.self_attn.out_proj.bias": "encoder_stack.block.2.attn.o.bias", "encoder.layers.3.self_attn.q_proj.weight": "encoder_stack.block.3.attn.q.weight", "encoder.layers.3.self_attn.q_proj.bias": "encoder_stack.block.3.attn.q.bias", "encoder.layers.3.self_attn.k_proj.weight": "encoder_stack.block.3.attn.k.weight", "encoder.layers.3.self_attn.k_proj.bias": "encoder_stack.block.3.attn.k.bias", "encoder.layers.3.self_attn.v_proj.weight": "encoder_stack.block.3.attn.v.weight", "encoder.layers.3.self_attn.v_proj.bias": "encoder_stack.block.3.attn.v.bias", "encoder.layers.3.self_attn.out_proj.weight": "encoder_stack.block.3.attn.o.weight", "encoder.layers.3.self_attn.out_proj.bias": "encoder_stack.block.3.attn.o.bias", "encoder.layers.4.self_attn.q_proj.weight": "encoder_stack.block.4.attn.q.weight", "encoder.layers.4.self_attn.q_proj.bias": "encoder_stack.block.4.attn.q.bias", "encoder.layers.4.self_attn.k_proj.weight": "encoder_stack.block.4.attn.k.weight", "encoder.layers.4.self_attn.k_proj.bias": "encoder_stack.block.4.attn.k.bias", "encoder.layers.4.self_attn.v_proj.weight": "encoder_stack.block.4.attn.v.weight", "encoder.layers.4.self_attn.v_proj.bias": "encoder_stack.block.4.attn.v.bias", "encoder.layers.4.self_attn.out_proj.weight": "encoder_stack.block.4.attn.o.weight", "encoder.layers.4.self_attn.out_proj.bias": "encoder_stack.block.4.attn.o.bias", "encoder.layers.5.self_attn.q_proj.weight": "encoder_stack.block.5.attn.q.weight", "encoder.layers.5.self_attn.q_proj.bias": "encoder_stack.block.5.attn.q.bias", "encoder.layers.5.self_attn.k_proj.weight": "encoder_stack.block.5.attn.k.weight", "encoder.layers.5.self_attn.k_proj.bias": "encoder_stack.block.5.attn.k.bias", "encoder.layers.5.self_attn.v_proj.weight": "encoder_stack.block.5.attn.v.weight", "encoder.layers.5.self_attn.v_proj.bias": "encoder_stack.block.5.attn.v.bias", "encoder.layers.5.self_attn.out_proj.weight": "encoder_stack.block.5.attn.o.weight", "encoder.layers.5.self_attn.out_proj.bias": "encoder_stack.block.5.attn.o.bias", "encoder.layers.6.self_attn.q_proj.weight": "encoder_stack.block.6.attn.q.weight", "encoder.layers.6.self_attn.q_proj.bias": "encoder_stack.block.6.attn.q.bias", "encoder.layers.6.self_attn.k_proj.weight": "encoder_stack.block.6.attn.k.weight", "encoder.layers.6.self_attn.k_proj.bias": "encoder_stack.block.6.attn.k.bias", "encoder.layers.6.self_attn.v_proj.weight": "encoder_stack.block.6.attn.v.weight", "encoder.layers.6.self_attn.v_proj.bias": "encoder_stack.block.6.attn.v.bias", "encoder.layers.6.self_attn.out_proj.weight": "encoder_stack.block.6.attn.o.weight", "encoder.layers.6.self_attn.out_proj.bias": "encoder_stack.block.6.attn.o.bias", "encoder.layers.7.self_attn.q_proj.weight": "encoder_stack.block.7.attn.q.weight", "encoder.layers.7.self_attn.q_proj.bias": "encoder_stack.block.7.attn.q.bias", "encoder.layers.7.self_attn.k_proj.weight": "encoder_stack.block.7.attn.k.weight", "encoder.layers.7.self_attn.k_proj.bias": "encoder_stack.block.7.attn.k.bias", "encoder.layers.7.self_attn.v_proj.weight": "encoder_stack.block.7.attn.v.weight", "encoder.layers.7.self_attn.v_proj.bias": "encoder_stack.block.7.attn.v.bias", "encoder.layers.7.self_attn.out_proj.weight": "encoder_stack.block.7.attn.o.weight", "encoder.layers.7.self_attn.out_proj.bias": "encoder_stack.block.7.attn.o.bias", "encoder.layers.8.self_attn.q_proj.weight": "encoder_stack.block.8.attn.q.weight", "encoder.layers.8.self_attn.q_proj.bias": "encoder_stack.block.8.attn.q.bias", "encoder.layers.8.self_attn.k_proj.weight": "encoder_stack.block.8.attn.k.weight", "encoder.layers.8.self_attn.k_proj.bias": "encoder_stack.block.8.attn.k.bias", "encoder.layers.8.self_attn.v_proj.weight": "encoder_stack.block.8.attn.v.weight", "encoder.layers.8.self_attn.v_proj.bias": "encoder_stack.block.8.attn.v.bias", "encoder.layers.8.self_attn.out_proj.weight": "encoder_stack.block.8.attn.o.weight", "encoder.layers.8.self_attn.out_proj.bias": "encoder_stack.block.8.attn.o.bias", "encoder.layers.9.self_attn.q_proj.weight": "encoder_stack.block.9.attn.q.weight", "encoder.layers.9.self_attn.q_proj.bias": "encoder_stack.block.9.attn.q.bias", "encoder.layers.9.self_attn.k_proj.weight": "encoder_stack.block.9.attn.k.weight", "encoder.layers.9.self_attn.k_proj.bias": "encoder_stack.block.9.attn.k.bias", "encoder.layers.9.self_attn.v_proj.weight": "encoder_stack.block.9.attn.v.weight", "encoder.layers.9.self_attn.v_proj.bias": "encoder_stack.block.9.attn.v.bias", "encoder.layers.9.self_attn.out_proj.weight": "encoder_stack.block.9.attn.o.weight", "encoder.layers.9.self_attn.out_proj.bias": "encoder_stack.block.9.attn.o.bias", "encoder.layers.10.self_attn.q_proj.weight": "encoder_stack.block.10.attn.q.weight", "encoder.layers.10.self_attn.q_proj.bias": "encoder_stack.block.10.attn.q.bias", "encoder.layers.10.self_attn.k_proj.weight": "encoder_stack.block.10.attn.k.weight", "encoder.layers.10.self_attn.k_proj.bias": "encoder_stack.block.10.attn.k.bias", "encoder.layers.10.self_attn.v_proj.weight": "encoder_stack.block.10.attn.v.weight", "encoder.layers.10.self_attn.v_proj.bias": "encoder_stack.block.10.attn.v.bias", "encoder.layers.10.self_attn.out_proj.weight": "encoder_stack.block.10.attn.o.weight", "encoder.layers.10.self_attn.out_proj.bias": "encoder_stack.block.10.attn.o.bias", "encoder.layers.11.self_attn.q_proj.weight": "encoder_stack.block.11.attn.q.weight", "encoder.layers.11.self_attn.q_proj.bias": "encoder_stack.block.11.attn.q.bias", "encoder.layers.11.self_attn.k_proj.weight": "encoder_stack.block.11.attn.k.weight", "encoder.layers.11.self_attn.k_proj.bias": "encoder_stack.block.11.attn.k.bias", "encoder.layers.11.self_attn.v_proj.weight": "encoder_stack.block.11.attn.v.weight", "encoder.layers.11.self_attn.v_proj.bias": "encoder_stack.block.11.attn.v.bias", "encoder.layers.11.self_attn.out_proj.weight": "encoder_stack.block.11.attn.o.weight", "encoder.layers.11.self_attn.out_proj.bias": "encoder_stack.block.11.attn.o.bias", "encoder.layers.0.linear1.weight": "encoder_stack.block.0.ffn.i.weight", "encoder.layers.0.linear1.bias": "encoder_stack.block.0.ffn.i.bias", "encoder.layers.0.linear2.weight": "encoder_stack.block.0.ffn.o.weight", "encoder.layers.0.linear2.bias": "encoder_stack.block.0.ffn.o.bias", "encoder.layers.1.linear1.weight": "encoder_stack.block.1.ffn.i.weight", "encoder.layers.1.linear1.bias": "encoder_stack.block.1.ffn.i.bias", "encoder.layers.1.linear2.weight": "encoder_stack.block.1.ffn.o.weight", "encoder.layers.1.linear2.bias": "encoder_stack.block.1.ffn.o.bias", "encoder.layers.2.linear1.weight": "encoder_stack.block.2.ffn.i.weight", "encoder.layers.2.linear1.bias": "encoder_stack.block.2.ffn.i.bias", "encoder.layers.2.linear2.weight": "encoder_stack.block.2.ffn.o.weight", "encoder.layers.2.linear2.bias": "encoder_stack.block.2.ffn.o.bias", "encoder.layers.3.linear1.weight": "encoder_stack.block.3.ffn.i.weight", "encoder.layers.3.linear1.bias": "encoder_stack.block.3.ffn.i.bias", "encoder.layers.3.linear2.weight": "encoder_stack.block.3.ffn.o.weight", "encoder.layers.3.linear2.bias": "encoder_stack.block.3.ffn.o.bias", "encoder.layers.4.linear1.weight": "encoder_stack.block.4.ffn.i.weight", "encoder.layers.4.linear1.bias": "encoder_stack.block.4.ffn.i.bias", "encoder.layers.4.linear2.weight": "encoder_stack.block.4.ffn.o.weight", "encoder.layers.4.linear2.bias": "encoder_stack.block.4.ffn.o.bias", "encoder.layers.5.linear1.weight": "encoder_stack.block.5.ffn.i.weight", "encoder.layers.5.linear1.bias": "encoder_stack.block.5.ffn.i.bias", "encoder.layers.5.linear2.weight": "encoder_stack.block.5.ffn.o.weight", "encoder.layers.5.linear2.bias": "encoder_stack.block.5.ffn.o.bias", "encoder.layers.6.linear1.weight": "encoder_stack.block.6.ffn.i.weight", "encoder.layers.6.linear1.bias": "encoder_stack.block.6.ffn.i.bias", "encoder.layers.6.linear2.weight": "encoder_stack.block.6.ffn.o.weight", "encoder.layers.6.linear2.bias": "encoder_stack.block.6.ffn.o.bias", "encoder.layers.7.linear1.weight": "encoder_stack.block.7.ffn.i.weight", "encoder.layers.7.linear1.bias": "encoder_stack.block.7.ffn.i.bias", "encoder.layers.7.linear2.weight": "encoder_stack.block.7.ffn.o.weight", "encoder.layers.7.linear2.bias": "encoder_stack.block.7.ffn.o.bias", "encoder.layers.8.linear1.weight": "encoder_stack.block.8.ffn.i.weight", "encoder.layers.8.linear1.bias": "encoder_stack.block.8.ffn.i.bias", "encoder.layers.8.linear2.weight": "encoder_stack.block.8.ffn.o.weight", "encoder.layers.8.linear2.bias": "encoder_stack.block.8.ffn.o.bias", "encoder.layers.9.linear1.weight": "encoder_stack.block.9.ffn.i.weight", "encoder.layers.9.linear1.bias": "encoder_stack.block.9.ffn.i.bias", "encoder.layers.9.linear2.weight": "encoder_stack.block.9.ffn.o.weight", "encoder.layers.9.linear2.bias": "encoder_stack.block.9.ffn.o.bias", "encoder.layers.10.linear1.weight": "encoder_stack.block.10.ffn.i.weight", "encoder.layers.10.linear1.bias": "encoder_stack.block.10.ffn.i.bias", "encoder.layers.10.linear2.weight": "encoder_stack.block.10.ffn.o.weight", "encoder.layers.10.linear2.bias": "encoder_stack.block.10.ffn.o.bias", "encoder.layers.11.linear1.weight": "encoder_stack.block.11.ffn.i.weight", "encoder.layers.11.linear1.bias": "encoder_stack.block.11.ffn.i.bias", "encoder.layers.11.linear2.weight": "encoder_stack.block.11.ffn.o.weight", "encoder.layers.11.linear2.bias": "encoder_stack.block.11.ffn.o.bias", "encoder.layers.0.norm1.weight": "encoder_stack.block.0.ln1.weight", "encoder.layers.0.norm1.bias": "encoder_stack.block.0.ln1.bias", "encoder.layers.1.norm1.weight": "encoder_stack.block.1.ln1.weight", "encoder.layers.1.norm1.bias": "encoder_stack.block.1.ln1.bias", "encoder.layers.2.norm1.weight": "encoder_stack.block.2.ln1.weight", "encoder.layers.2.norm1.bias": "encoder_stack.block.2.ln1.bias", "encoder.layers.3.norm1.weight": "encoder_stack.block.3.ln1.weight", "encoder.layers.3.norm1.bias": "encoder_stack.block.3.ln1.bias", "encoder.layers.4.norm1.weight": "encoder_stack.block.4.ln1.weight", "encoder.layers.4.norm1.bias": "encoder_stack.block.4.ln1.bias", "encoder.layers.5.norm1.weight": "encoder_stack.block.5.ln1.weight", "encoder.layers.5.norm1.bias": "encoder_stack.block.5.ln1.bias", "encoder.layers.6.norm1.weight": "encoder_stack.block.6.ln1.weight", "encoder.layers.6.norm1.bias": "encoder_stack.block.6.ln1.bias", "encoder.layers.7.norm1.weight": "encoder_stack.block.7.ln1.weight", "encoder.layers.7.norm1.bias": "encoder_stack.block.7.ln1.bias", "encoder.layers.8.norm1.weight": "encoder_stack.block.8.ln1.weight", "encoder.layers.8.norm1.bias": "encoder_stack.block.8.ln1.bias", "encoder.layers.9.norm1.weight": "encoder_stack.block.9.ln1.weight", "encoder.layers.9.norm1.bias": "encoder_stack.block.9.ln1.bias", "encoder.layers.10.norm1.weight": "encoder_stack.block.10.ln1.weight", "encoder.layers.10.norm1.bias": "encoder_stack.block.10.ln1.bias", "encoder.layers.11.norm1.weight": "encoder_stack.block.11.ln1.weight", "encoder.layers.11.norm1.bias": "encoder_stack.block.11.ln1.bias", "encoder.layers.0.norm2.weight": "encoder_stack.block.0.ln2.weight", "encoder.layers.0.norm2.bias": "encoder_stack.block.0.ln2.bias", "encoder.layers.1.norm2.weight": "encoder_stack.block.1.ln2.weight", "encoder.layers.1.norm2.bias": "encoder_stack.block.1.ln2.bias", "encoder.layers.2.norm2.weight": "encoder_stack.block.2.ln2.weight", "encoder.layers.2.norm2.bias": "encoder_stack.block.2.ln2.bias", "encoder.layers.3.norm2.weight": "encoder_stack.block.3.ln2.weight", "encoder.layers.3.norm2.bias": "encoder_stack.block.3.ln2.bias", "encoder.layers.4.norm2.weight": "encoder_stack.block.4.ln2.weight", "encoder.layers.4.norm2.bias": "encoder_stack.block.4.ln2.bias", "encoder.layers.5.norm2.weight": "encoder_stack.block.5.ln2.weight", "encoder.layers.5.norm2.bias": "encoder_stack.block.5.ln2.bias", "encoder.layers.6.norm2.weight": "encoder_stack.block.6.ln2.weight", "encoder.layers.6.norm2.bias": "encoder_stack.block.6.ln2.bias", "encoder.layers.7.norm2.weight": "encoder_stack.block.7.ln2.weight", "encoder.layers.7.norm2.bias": "encoder_stack.block.7.ln2.bias", "encoder.layers.8.norm2.weight": "encoder_stack.block.8.ln2.weight", "encoder.layers.8.norm2.bias": "encoder_stack.block.8.ln2.bias", "encoder.layers.9.norm2.weight": "encoder_stack.block.9.ln2.weight", "encoder.layers.9.norm2.bias": "encoder_stack.block.9.ln2.bias", "encoder.layers.10.norm2.weight": "encoder_stack.block.10.ln2.weight", "encoder.layers.10.norm2.bias": "encoder_stack.block.10.ln2.bias", "encoder.layers.11.norm2.weight": "encoder_stack.block.11.ln2.weight", "encoder.layers.11.norm2.bias": "encoder_stack.block.11.ln2.bias", "pooler.dense.weight": "pooler.weight", "pooler.dense.bias": "pooler.bias"}